---
title: "gov2020-repl-project"
format: pdf
editor: visual
---

## Packages

```{r}

library(tidyverse)
library(broom)
library(estimatr)
library(margins)
library(lme4)

# remotes::install_github("markwestcott34/stargazer-booktabs")
library(stargazer)

```

## Import Data

```{r}

panel_year <- read.csv("panel-year-gov2020.csv")

```

## New Variables

```{r}

# Function to check if of age during a conflict
of_age_when <- function(conflict_years, birth_year, age_range = 18:40) {
  min_year <- min(conflict_years)
  max_year <- max(conflict_years)
  birth_age_min <- min_year - birth_year
  birth_age_max <- max_year - birth_year
  return(birth_age_min %in% age_range | birth_age_max %in% age_range)
}

# Function to create combat instrument
create_combat_instrument <- function(cohort, combat_str, combat_pattern) {
  cohort * (str_detect(combat_str, combat_pattern))
}

# Refactored cohort_frame
cohort_frame <- panel_year %>%
  mutate(
    promotion_after_1949 = ifelse(!is.na(shangjiang_year), 1, 0),
    promotion_after_1988 = ifelse(is.na(shangjiang_year) | shangjiang_year < 1988, 0, 1),
    promotion_before_1955 = ifelse(is.na(shangjiang_year) | shangjiang_year > 1955, 0, 1),
    
    long_march_cohort = of_age_when(1934:1935, birth_year, age_range = 16:40),
    combat_japan_cohort = of_age_when(1937:1945, birth_year, age_range = 16:40),
    combat_communist_cohort = of_age_when(1946:1950, birth_year, age_range = 16:40),
    combat_korea_cohort = of_age_when(1950:1953, birth_year),
    combat_vietnam_cohort = of_age_when(1964:1979, birth_year),
    combat_india_cohort = of_age_when(1962, birth_year),
    
    long_march_subset = of_age_when(1934:1935, birth_year, 30:50),
    combat_korea_subset = of_age_when(1950:1953, birth_year, 30:50),
    combat_vietnam_subset = of_age_when(1964:1979, birth_year, 30:50),
    combat_communist_subset = of_age_when(1946:1950, birth_year, 30:50),
    combat_japan_subset = of_age_when(1937:1945, birth_year, 30:50),
    combat_india_subset = of_age_when(1962, birth_year, 30:50),
    
    fought_korea = str_detect(combat, "Korea|Korean"),
    fought_vietnam = str_detect(combat, "Vietnam|Vietnamese"),
    fought_communist = str_detect(combat, "Communist"),
    fought_japan = str_detect(combat, "Japan|Japanese"),
    fought_india = str_detect(combat, "India|Indian"),
    
    last_conflict_year = case_when(
      fought_vietnam ~ 1975,
      fought_korea ~ 1953,
      fought_india ~ 1962,
      fought_communist ~ 1950,
      fought_japan ~ 1945,
      TRUE ~ NA_real_
    ),
    
    age_when_shangjiang = shangjiang_year - birth_year,
    
    missed_all_eligibility = as.integer(
      !(birth_year %in% c(1904:1920, 1920:1935, 1949:1961, 1907:1929, 1900:1922, 1919:1939))
    ),
    any_combat = fought_korea + fought_vietnam + fought_india + fought_communist + fought_japan + participated_long_march,
    combat_pre_1955 = fought_korea + fought_japan + fought_communist + participated_long_march,
    combat_post_1955 = fought_vietnam + fought_india
  ) %>%
  mutate(
    pre_prc_cohort = as.integer(birth_year %in% 1894:1933),
    post_prc_cohort = as.integer(birth_year %in% 1910:1959)
  ) 

```

## Restructure Data

```{r}

df <- cohort_frame %>% 
  dplyr::select(name, unique_id, birth_year, general_rank, promotion_after_1949, any_combat, combat_pre_1955, combat_post_1949, promotion_before_1955, promotion_after_1988, ends_with(c("instrument")), participated_long_march, starts_with(c("fought")), ends_with(c("cohort")), ends_with(c("subset")), cmc_chair_connection, shangjiang_year, age_when_shangjiang, education_level, starts_with(c("parent_")), birth_prefecture, birth_province, ends_with(".network"), combat_post_1955, cohort_decade) %>% 
  distinct()

standardise <- function(x){
  return((x - mean(x, na.rm = T))/sd(x))
}

df %<>% group_by(cohort_decade) %>%
  mutate(
    std_com_pre = standardise(combat_pre_1955),
    std_com_post = standardise(any_combat)
  )
```

## Base Model

```{r}

# Promotion to general ~ combat before and after 1949

m1 <- lm(promotion_after_1949 ~ any_combat, data = df)

summary(m1)

m2 <- lm(promotion_before_1955 ~ combat_pre_1955 + as.factor(cohort_decade), data = df)

summary(m2)

m3 <- lm(promotion_after_1988 ~ any_combat  + as.factor(cohort_decade), data = df %>% filter(!promotion_before_1955 == 1))

summary(m3)

```

```{r, eval = F}

tidy_m2 <- tidy(m2, conf.int = TRUE)
tidy_m3 <- tidy(m3, conf.int = TRUE)

tidy_m2 <- tidy_m2 %>% mutate(model = "Promotion Before 1955")
tidy_m3 <- tidy_m3 %>% mutate(model = "Promotion After 1988")

tidy_frame <- tidy_m2 %>% 
  bind_rows(tidy_m3) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(model = factor(model, levels = c("Promotion Before 1955", "Promotion After 1988")))

ggplot(tidy_frame, aes(x = model, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_hline(yintercept = 0, lty = "dashed", color = "darkgrey") +
  geom_pointrange() +
  labs(
    title = "Coefficient Plot for Promotion Models",
    x = "Model",
    y = "Log Odds (Coefficient)"
  ) +
  theme_bw() +
  scale_y_continuous(limits = c(-2, 2)) +
  labs(y = "Promotion Probability (Log Odds)",
       x = NULL,
       title = "")

ggsave(filename = "output/base_logodds.pdf", width = 6, height = 4)

mar1 <- margins(m2) %>% summary
mar2 <- margins(m3) %>% summary

t <- rbind(mar1, mar2) %>% 
  filter(factor %in% c("combat_pre_1955", "any_combat")) %>%
  mutate(`factor` = `factor` %>% factor(., levels = .))

ggplot(t, aes(x = factor, y = AME, ymin = lower, ymax = upper)) + geom_hline(yintercept = 0, lty = "dashed", color = "darkgrey") +
  geom_pointrange() +
  scale_x_discrete(labels = c("Pre-", "Post-")) +
  scale_y_continuous(labels = scales::percent_format(suffix = "pp")) +
  labs(
    title = "Marginal Effect of Combat Experience on Promotion Probability",
    x = "State Consolidation",
    y = NULL
  ) +
  theme_bw()
  # scale_y_continuous(limits = c(-2, 2)) +

ggsave(filename = "output/base_ames.pdf", height = 4, width = 6)

```

## Expanded Model 0A (+Birth cohort)

```{r}
m2a <- glm(promotion_before_1955 ~ combat_pre_1955 + as.factor(cohort_decade), family = "binomial", data = df)

summary(m2a)


m2aa <- glmer(promotion_before_1955 ~ combat_pre_1955 + (combat_pre_1955  - 1 | cohort_decade), family = "binomial", data = df)

summary(m2aa)

m3a <- glm(promotion_after_1988 ~ combat_pre_1955 + combat_post_1955 + as.factor(cohort_decade), family = "binomial", data = df %>% filter(promotion_before_1955 != 1))

summary(m3a)

m3aa <- glmer(promotion_after_1988 ~ combat_pre_1955 + combat_post_1955 + ( combat_pre_1955 + combat_post_1955 - 1 | cohort_decade), family = "binomial", data = df)

summary(m3aa)

mar1a <- margins(m2a) %>% summary
mar2a <- margins(m3a) %>% summary



```

## Expanded Model 1 (+ education + parentCCP)

```{r}
m4 <-  glm(promotion_after_1949 ~ any_combat + education_level + parent_CCP_leader, data = df, family = binomial(link = "logit"))

summary(m4)

m5 <- glm(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader , data = df, family = binomial(link = "logit"))

summary(m5)

m6 <- glm(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader, data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

summary(m6)


m4a <-  glm(promotion_after_1949 ~ any_combat + education_level + parent_CCP_leader + as.factor(cohort_decade) , data = df, family = binomial(link = "logit"))


m4aa <-  glmer(promotion_after_1949 ~ any_combat + education_level + parent_CCP_leader + (any_combat | cohort_decade) , data = df, family = binomial(link = "logit"))


m5a <- glm(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader  + as.factor(cohort_decade), data = df, family = binomial(link = "logit"))

m5aa <-  glmer(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader + (combat_pre_1955 - 1 | cohort_decade) , data = df, family = binomial(link = "logit"))

m6a <- glm(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader + as.factor(cohort_decade), data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

m6aa <- glmer(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader + (combat_pre_1955 + combat_post_1955 - 1 | cohort_decade), data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

```

## Expanded Model 2 (+ birth province FE)

```{r}
m7 <-  glm(promotion_after_1949 ~ any_combat + education_level + parent_CCP_leader + as.factor(birth_province), data = df, family = binomial(link = "logit"))

summary(m7)

m8 <- glm(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader + as.factor(birth_province), data = df, family = binomial(link = "logit"))

summary(m8)

m9 <- glm(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader + as.factor(birth_province), data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

summary(m9)


m7a <-  glm(promotion_after_1949 ~ any_combat + education_level + parent_CCP_leader + as.factor(birth_province) + as.factor(cohort_decade), data = df, family = binomial(link = "logit"))

summary(m7)

m8a <- glm(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader + as.factor(birth_province) + as.factor(cohort_decade), data = df, family = binomial(link = "logit"))

summary(m8)

m9a <- glm(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader + as.factor(birth_province) + as.factor(cohort_decade), data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

summary(m9)

m8aa <- glmer(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader + as.factor(birth_province) + (combat_pre_1955 - 1 | cohort_decade), data = df, family = binomial(link = "logit"))

summary(m8)

m9aa <- glmer(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader + as.factor(birth_province) + (combat_pre_1955 + combat_post_1955 - 1 | cohort_decade), data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

summary(m9)


```

## Expanded Model 3 (+ network)

```{r}
m10 <-  glm(promotion_after_1949 ~ any_combat + education_level + parent_CCP_leader + as.factor(birth_province), data = df, family = binomial(link = "logit"))

summary(m10)

m11 <- glm(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader + deng.network + jiang.network + hu.network + xi.network + as.factor(birth_province) , data = df, family = binomial(link = "logit"))

summary(m11)

m12 <- glm(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader + deng.network + jiang.network + hu.network + xi.network + as.factor(birth_province) , data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

summary(m12)

m10a <-  glm(promotion_after_1949 ~ any_combat + education_level + parent_CCP_leader + as.factor(birth_province) + as.factor(cohort_decade), data = df, family = binomial(link = "logit"))

# summary(m10)

m11a <- glm(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader + deng.network + jiang.network + hu.network + xi.network + as.factor(birth_province) + as.factor(cohort_decade), data = df, family = binomial(link = "logit"))

# summary(m11)

m12a <- glm(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader + deng.network + jiang.network + hu.network + xi.network + as.factor(birth_province) + as.factor(cohort_decade), data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

# summary(m12)

m11aa <- glmer(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader + deng.network + jiang.network + hu.network + xi.network + as.factor(birth_province) + (combat_pre_1955 + combat_post_1955 - 1 | cohort_decade), data = df, family = binomial(link = "logit"))

# summary(m11)

m12aa <- glmer(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader + deng.network + jiang.network + hu.network + xi.network + as.factor(birth_province) + (combat_pre_1955 + combat_post_1955 - 1 | cohort_decade), data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

# summary(m12)
```

## Export regression table (1)

```{r, eval = F}
stargazer(m2, m3, m5, m6, m8, m9, m11, m12,
          header = FALSE,
          omit = c("birth_province", ".network"), type = "latex",
          dep.var.labels = rep(c("Pre-1955", "1988+"),4),
          add.lines=list(#c("Birth decade fixed effects", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$"),
                         c("Birth province fixed effects", "", "", "", "", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$"),
                         c("Leader networks", "", "", "", "", "", "", "$\\checkmark$", "$\\checkmark$")),
          covariate.labels = c("Pre-1955 Combat exp.", "Post-1955 Combat exp.",
                               "Education", "Parental CCP history"),
          label = "tab:logit-regs",
          title = "Logit Regressions on Pre- and Post-Statebuilding Promotions",
          out = "output/logit-reg.tex")
```

## Export regression table (2)

```{r}
stargazer(m2a, m3a, m5a, m6a, m8a, m9a, m11a, m12a,
          header = FALSE,
          omit = c("birth_province", ".network", "_decade"), type = "latex",
          dep.var.labels = rep(c("Pre-1955", "1988+"),4),
          add.lines=list(c("Birth decade fixed effects", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$"),
                         c("Birth province fixed effects", "", "", "", "", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$"),
                         c("Leader networks", "", "", "", "", "", "", "$\\checkmark$", "$\\checkmark$")),
          covariate.labels = c("Pre-1955 Combat exp.", "Post-1955 Combat exp.",
                               "Education", "Parental CCP history"),
          label = "tab:logit-regs",
          title = "Logit Regressions on Pre- and Post-Statebuilding Promotions",
          out = "output/logit-regs-birthcohort.tex"
          )
```

## Export regression table (3)

```{r}
stargazer(m2aa, m3aa, m5aa, m6aa, m8aa, m9aa, m11aa, m12aa,
          header = FALSE,
          omit = c("birth_province", ".network", "_decade"), type = "latex",
          dep.var.labels = rep(c("Pre-1955", "1988+"),4),
          add.lines=list(c("Birth decade fixed effects", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$"),
                         c("Birth province fixed effects", "", "", "", "", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$"),
                         c("Leader networks", "", "", "", "", "", "", "$\\checkmark$", "$\\checkmark$")),
          covariate.labels = c("Pre-1955 Combat exp.", "Post-1955 Combat exp.",
                               "Education", "Parental CCP history"),
          label = "tab:logit-regs",
          title = "Logit Regressions on Pre- and Post-Statebuilding Promotions",
          out = "output/logit-regs-birthcohort-partial-pooling.tex"
          )
```

## Expanded Model 4

```{r}

m13 <- glm(promotion_before_1955 ~ participated_long_march, data = df, family = binomial(link = "logit"))

summary(m13)

m14 <- glm(promotion_before_1955 ~ fought_japan, data = df, family = binomial(link = "logit"))

summary(m14)

m15 <- glm(promotion_before_1955 ~ fought_communist, data = df, family = binomial(link = "logit"))

summary(m15)

m16 <- glm(promotion_before_1955 ~ fought_korea, data = df, family = binomial(link = "logit"))

summary(m16)

tidy_m13 <- tidy(m13, conf.int = TRUE)
tidy_m14 <- tidy(m14, conf.int = TRUE)
tidy_m15 <- tidy(m15, conf.int = TRUE)
tidy_m16 <- tidy(m16, conf.int = TRUE)


tidy_m13 <- tidy_m13 %>% mutate(model = "Fought Long March")
tidy_m14 <- tidy_m14 %>% mutate(model = "Fought Japan")
tidy_m15 <- tidy_m15 %>% mutate(model = "Fought Civil War")
tidy_m16 <- tidy_m16 %>% mutate(model = "Fought Korea")


tidy_frame <- tidy_m13 %>% 
  bind_rows(tidy_m14) %>%
  bind_rows(tidy_m15) %>% 
  bind_rows(tidy_m16) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(model = factor(model, levels = c("Fought Long March", "Fought Japan",
                                          "Fought Civil War", "Fought Korea")))


ggplot(tidy_frame, aes(x = model, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_hline(yintercept = 0, lty = "dashed", color = "darkgrey") +
  geom_pointrange() +
  labs(
    title = "Coefficient Plot for Promotion Models",
    x = "Model",
    y = "Log Odds (Coefficient)"
  ) +
  theme_minimal() +
  labs(y = "Promotion Probability (Log Odds)",
       x = NULL,
       title = "")

```

## IV Approach

```{r}

iv_pre <- iv_robust(promotion_before_1955 ~ combat_pre_1955 | 
                                pre_prc_cohort, 
                              data = df, diagnostics = TRUE)

summary(iv_pre)

iv_post <- iv_robust(promotion_after_1988 ~ combat_post_1955 + combat_pre_1955 | 
                                post_prc_cohort + combat_pre_1955, 
                              data = df, diagnostics = TRUE)

summary(iv_post)

library(broom)

coef_pre <- tidy(iv_pre, conf.int = TRUE)[2, ]
coef_post <- tidy(iv_post, conf.int = TRUE)[2, ]

coefficients <- coef_pre %>% 
  bind_rows(coef_post) 

coefficients$model <- factor(c("Pre-PRC", "Post-PRC"), levels = c("Pre-PRC", "Post-PRC"))

ggplot(coefficients, aes(x = model, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
  theme_minimal() +
  labs(
    title = "2SLS Coefficient Plots",
    x = "Combat experience in __________",
    y = "Changes in probability of promotion post-1949"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

```

## IV Regression (Rank \~ Fight \| Eligible) (Promotions in 1955)

### Long March

```{r}

iv_model_long_march_1 <- iv_robust(promotion_before_1955 ~ participated_long_march | 
                                long_march_cohort, 
                              data = df, diagnostics = TRUE)

summary(iv_model_long_march)

# first_stage_long_march <- lm(combat_post_1949 ~ long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_long_march)

```

### Sino-Japanese War

```{r}

iv_model_japan_war_1 <- iv_robust(promotion_before_1955 ~ fought_japan + participated_long_march | combat_japan_cohort + participated_long_march, 
                              data = df, diagnostics = TRUE)

summary(iv_model_japan_war)

# first_stage_japan_war <- lm(combat_post_1949 ~ combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)

# summary(first_stage_japan_war)



```

### Communist Civil War

```{r}

iv_model_civil_war_1 <- iv_robust(promotion_before_1955 ~  fought_communist + participated_long_march + fought_japan | 
                              combat_communist_cohort + participated_long_march + fought_japan, 
                              data = df, diagnostics = TRUE)

summary(iv_model_civil_war)
# 
# first_stage_civil_war <- lm(combat_post_1949 ~ combat_communist_instrument + combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_civil_war)

```

### Korean War

```{r}

iv_model_korea_1 <- iv_robust(promotion_before_1955 ~ fought_korea + fought_communist + fought_japan + participated_long_march | combat_korea_cohort + fought_communist + fought_japan + participated_long_march, 
                        data = df, diagnostics = TRUE)

summary(iv_model_korea)

# first_stage_korea <- lm(combat_post_1949 ~ combat_korea_instrument + combat_japan_instrument + combat_communist_instrument + long_march_instrument + age, data = df)
# 
# summary(first_stage_korea)


```

```{r}

library(broom)

coef_long_march <- tidy(iv_model_long_march_1, conf.int = TRUE)[2, ]
coef_japan_war <- tidy(iv_model_japan_war_1, conf.int = TRUE)[2, ]
coef_civil_war <- tidy(iv_model_civil_war_1, conf.int = TRUE)[2, ]
coef_korea <- tidy(iv_model_korea_1, conf.int = TRUE)[2, ]
# coef_india <- tidy(iv_model_india, conf.int = TRUE)[2, ]
# coef_vietnam <- tidy(iv_model_vietnam, conf.int = TRUE)[2, ]

coefficients_1 <- coef_long_march %>% 
  bind_rows(coef_japan_war) %>% 
  bind_rows(coef_civil_war) %>% 
  bind_rows(coef_korea) # %>%
  # bind_rows(coef_india) %>% 
  # bind_rows(coef_vietnam)

coefficients_1$model <- factor(c("Long March", "Sino-Japanese War", "Civil War" ,"Korean War"), levels = c("Long March", "Sino-Japanese War", "Civil War" ,"Korean War"))

plot55 <- coefficients_1 %>% 
  filter(!model %in% c("Sino-India War", "Vietnam War")) %>% 
  ggplot(aes(x = model, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.1) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Pre-consolidation (1955)",
    x = NULL,
    y = "Change in Probability of Promotion"
  ) +
  theme_bw() +
    scale_y_continuous(limits = c(-1, .5)) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5))

```

## IV Regression (Rank \~ Fight \| Eligible) (Promotions post 1988)

### Long March

```{r}

iv_model_long_march_2 <- iv_robust(promotion_after_1988 ~ participated_long_march | 
                                long_march_cohort, 
                              data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_long_march)

# first_stage_long_march <- lm(combat_post_1949 ~ long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_long_march)

```

### Sino-Japanese War

```{r}

iv_model_japan_war_2 <- iv_robust(promotion_after_1988 ~ fought_japan + participated_long_march | combat_japan_cohort + participated_long_march, 
                              data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_japan_war)

# first_stage_japan_war <- lm(combat_post_1949 ~ combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)

# summary(first_stage_japan_war)



```

### Communist Civil War

```{r}

iv_model_civil_war_2 <- iv_robust(promotion_after_1988 ~  fought_communist + participated_long_march + fought_japan | 
                              combat_communist_cohort + participated_long_march + fought_japan, 
                              data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_civil_war)
# 
# first_stage_civil_war <- lm(combat_post_1949 ~ combat_communist_instrument + combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_civil_war)

```

### Korean War

```{r}

iv_model_korea_2 <- iv_robust(promotion_after_1988 ~ fought_korea + fought_communist + fought_japan + participated_long_march | combat_korea_cohort + fought_communist + fought_japan + participated_long_march, 
                        data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_korea)

# first_stage_korea <- lm(combat_post_1949 ~ combat_korea_instrument + combat_japan_instrument + combat_communist_instrument + long_march_instrument + age, data = df)
# 
# summary(first_stage_korea)


```

### Sino-Indian War

```{r}

iv_model_india_2 <- iv_robust(promotion_after_1988 ~ fought_india +fought_korea + fought_communist + fought_japan + participated_long_march | 
                            combat_india_cohort + fought_korea + fought_communist + fought_japan + participated_long_march, 
                          data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_india)

# first_stage_vietnam <- lm(combat_post_1949 ~ combat_vietnam_instrument + combat_korea_instrument + combat_communist_instrument + combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_vietnam)
```

### Vietnam War

```{r}

iv_model_vietnam_2 <- iv_robust(promotion_after_1988 ~ fought_vietnam + fought_korea + fought_communist + fought_japan + fought_india + participated_long_march | 
                            combat_vietnam_cohort + fought_korea + fought_communist + fought_japan + fought_india + participated_long_march, 
                          data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_vietnam)

# first_stage_vietnam <- lm(combat_post_1949 ~ combat_vietnam_instrument + combat_korea_instrument + combat_communist_instrument + combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_vietnam)

```

```{r}

library(broom)

coef_long_march <- tidy(iv_model_long_march_2, conf.int = TRUE)[2, ]
coef_japan_war <- tidy(iv_model_japan_war_2, conf.int = TRUE)[2, ]
coef_civil_war <- tidy(iv_model_civil_war_2, conf.int = TRUE)[2, ]
coef_korea <- tidy(iv_model_korea_2, conf.int = TRUE)[2, ]
coef_india <- tidy(iv_model_india_2, conf.int = TRUE)[2, ]
coef_vietnam <- tidy(iv_model_vietnam_2, conf.int = TRUE)[2, ]

coefficients_2 <- coef_long_march %>% 
  bind_rows(coef_japan_war) %>% 
  bind_rows(coef_civil_war) %>% 
  bind_rows(coef_korea) %>%
  bind_rows(coef_india) %>% 
  bind_rows(coef_vietnam)

coefficients_2$model <- factor(c("Long March", "Sino-Japanese War", "Civil War" ,"Korean War", "Sino-India War", "Vietnam War"), levels = c("Long March", "Sino-Japanese War", "Civil War" ,"Korean War", "Sino-India War", "Vietnam War"))

plot88 <- coefficients %>% 
  filter(!model %in% c("Sino-India War", "Vietnam War")) %>% 
  ggplot(aes(x = model, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.1) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Post-consolidation (1988-)",
    x = NULL,
    y = "Change in Probability of Promotion"
  ) +
  theme_bw() +
  scale_y_continuous(limits = c(-1, .5)) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5))

```

## Combined Plots

```{r}

library(ggpubr)

combined_plot <- ggarrange(plot55 + scale_x_discrete(labels = c("Long March", "Japan", "Civil" ,"Korea")), plot88 + ylab(NULL)  + scale_x_discrete(labels = c("Long March", "Japan", "Civil" ,"Korea")))

combined_plot

ggsave(combined_plot, filename = "output/2sls.pdf", height = 4, width = 8)

```

## Pooled Difference Plots

```{r}
coefficients_1 <- coefficients_1 %>% mutate(
  var = std.error ^2,
  type = "before",
)

coefficients_2 <- coefficients_2 %>% mutate(
  var = std.error ^2,
  type = "after"
)

combined_coefficients <- rbind(coefficients_1[, c("term", "estimate", "var", "type")],coefficients_2[1:4, c("term", "estimate", "var", "type")])

c = qnorm(1-.05/2)

diff_df <- pivot_wider(combined_coefficients, id_cols= term, names_from = type,
            values_from = c(estimate, var)) %>% 
  mutate(diff = estimate_after - estimate_before,
         std.err = sqrt(var_before + var_after),
         term = factor(term, levels = term),
         ci.low = diff - std.err * c,
         ci.high = diff + std.err * c)

diff_p <- ggplot(diff_df, aes(x = term, y = diff, ymin = ci.low, ymax = ci.high)) + 
  geom_point() + 
  geom_errorbar(width = .2) + 
  geom_hline(yintercept = 0, color = 'red', linetype = 'dashed') + 
  scale_x_discrete(labels = c("Long March", "Japan", "Civil" ,"Korea")) +
  labs(
    y = "Change in Promotion Probabilities\n(Post minus Pre-consolidation)",
    x = "Combat Experience",
    caption = "Confidence intervals are derived from pooled standard errors\nof the before and after models at the 95% confidence level."
  ) +
  theme_bw()

ggsave(diff_p, filename = "output/diff_plot.pdf", height = 6, width = 8)
```

## Confounding Demonstration

```{r}

df_network <- df %>% 
  mutate(network = deng.network + jiang.network + hu.network + xi.network)

summary(lm(network ~ combat_pre_1955, data = df_network))
summary(lm(network ~ combat_post_1955, data = df_network))

network1 <- iv_robust(network ~ combat_pre_1955 | 
                                pre_prc_cohort, 
                              data = df_network, diagnostics = TRUE)

summary(network1)

network2 <- iv_robust(network ~ combat_post_1955 + combat_pre_1955 | 
                                post_prc_cohort + combat_pre_1955, 
                              data = df_network, diagnostics = TRUE)

summary(network2)







```

## Mediation

```{r, eval = F}
library(mediation)

mmod <- lm(network ~ combat_pre_1955, data = df_network)
ymod <- lm(promotion_before_1955 ~ network + combat_pre_1955, data = df_network)

mediate(mmod, ymod, treat = "combat_pre_1955", mediator = "network") %>% summary

mmod <- lm(network ~ combat_post_1955 + combat_pre_1955, data = df_network)
ymod <- lm(promotion_after_1988 ~ network + combat_post_1955 + combat_pre_1955, data = df_network)

mediate(mmod, ymod, treat = "combat_post_1955", mediator = "network") %>% summary
```

## Table 1: Counts of Eligible Combatants

```{r}

extract_figures <- function(x) {
  data.frame(
    Eligibility = c("Ineligible", "Ineligible", "Eligible", "Eligible"),
    Combat = c("No", "Yes", "No", "Yes"),
    Count = c(x[1], x[2], x[3], x[4])
  )
}

table1 <- list(
  table(df$participated_long_march, df$long_march_cohort),
  table(df$fought_japan, df$combat_japan_cohort),
  table(df$fought_communist, df$combat_communist_cohort),
  table(df$fought_korea, df$combat_korea_cohort),
  table(df$fought_india, df$combat_india_cohort),
  table(df$fought_vietnam, df$combat_vietnam_cohort)
) %>%
  lapply(extract_figures) %>%
  bind_rows(.id = "War") %>%  
  mutate(War = factor(War, labels = c("Long March", "Japan", "Civil War", "Korea", "India", "Vietnam"))) %>%
  pivot_wider(names_from = War, values_from = Count) 

table1 %>%
  kableExtra::kbl(booktabs = TRUE, format = "latex", align = "c", col.names = c("Eligibility", "Combat", "Long March", "Japan", "Civil War", "Korea", "India", "Vietnam")) %>%
  writeLines("output/eligibility.tex")

```

## Subset IV

```{r}
table(df$promotion_before_1955, df$cohort_decade)
table(df$promotion_after_1988, df$cohort_decade)

get_tail_end <- function(x){
  t = df[x,]$birth_year %>% max
  return(df %>% filter(birth_year %in% (t-5:t+5)))
}

iv_robust(promotion_before_1955 ~ fought_japan + participated_long_march | combat_japan_cohort + participated_long_march, 
                              data = get_tail_end(df$combat_japan_cohort), diagnostics = TRUE) %>% summary

iv_robust(promotion_after_1988 ~  fought_communist + fought_japan | combat_communist_cohort + fought_japan, 
                              data = get_tail_end(df$combat_communist_cohort), diagnostics = TRUE) %>% summary

iv_robust(promotion_after_1988 ~  fought_korea + fought_communist + fought_japan | combat_korea_cohort + combat_communist_cohort + fought_japan, 
                              data = get_tail_end(df$combat_korea_cohort), diagnostics = TRUE) %>% summary

iv_robust(promotion_after_1988 ~  fought_vietnam + fought_korea + fought_communist + fought_japan | combat_vietnam_cohort + fought_korea + fought_communist + fought_japan, 
                              data = get_tail_end(df$combat_vietnam_cohort), diagnostics = TRUE) %>% summary



```

### Placebo

```{r}
lm(promotion_after_1949 ~ as.factor(cohort_decade), data = df) %>% summary

```

## Interact revolutionary credentials of political leader
